/*****************************************************************************
 * Copyright (C) Codehaus.org                                                *
 * ------------------------------------------------------------------------- *
 * Licensed under the Apache License, Version 2.0 (the "License");           *
 * you may not use this file except in compliance with the License.          *
 * You may obtain a copy of the License at                                   *
 *                                                                           *
 * http://www.apache.org/licenses/LICENSE-2.0                                *
 *                                                                           *
 * Unless required by applicable law or agreed to in writing, software       *
 * distributed under the License is distributed on an "AS IS" BASIS,         *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  *
 * See the License for the specific language governing permissions and       *
 * limitations under the License.                                            *
 *****************************************************************************/
package com.googlecode.lazyparsec.pattern;

import com.googlecode.lazyparsec.util.Checks;
import com.googlecode.totallylazy.Predicate;

import java.util.regex.Matcher;

/**
 * Provides common {@link Pattern} implementations.
 *
 * @author Ben Yu
 */
public final class Patterns {

    private Patterns() {
    }

    /**
     * A {@link Pattern} that always returns {@link Pattern#MISMATCH}.
     */
    public static final Pattern NEVER = new Pattern() {
        @Override
        public int match(CharSequence src, int begin, int end) {
            return Pattern.MISMATCH;
        }
    };

    /**
     * A {@link Pattern} that always matches with match length {@code 0}.
     */
    public static final Pattern ALWAYS = new Pattern() {
        @Override
        public int match(CharSequence src, int begin, int end) {
            return 0;
        }
    };

    /**
     * A {@link Pattern} that matches any character and only mismatches for an empty string.
     */
    public static final Pattern ANY_CHAR = hasAtLeast(1);

    /**
     * A {@link Pattern} object that matches if the input has no character left. Match
     * length is {@code 0} if succeed.
     */
    public static final Pattern EOF = hasExact(0);

    /**
     * A {@link Pattern} object that succeeds with match length {@code 2} if there are at least 2
     * characters in the input and the first character is {@code '\'}. Mismatch otherwise.
     */
    public static final Pattern ESCAPED = new Pattern() {
        @Override
        public int match(CharSequence src, int begin, int end) {
            if (begin >= end - 1) return Pattern.MISMATCH;
            else if (src.charAt(begin) == '\\') return 2;
            else return Pattern.MISMATCH;
        }
    };

    /**
     * A {@link Pattern} object that matches an integer.
     */
    public static final Pattern INTEGER = many1(CharacterPredicates.IS_DIGIT);

    /**
     * A {@link Pattern} object that matches a decimal number that has at least one digit
     * before the decimal point. The decimal point and the numbers to the right are optional.
     * <p/>
     * <p> {@code 0, 11., 2.3} are all good candidates. While {@code .1, .} are not.
     */
    public static final Pattern STRICT_DECIMAL =
            isChar('-').optional().next(INTEGER.next(isChar('.').next(many(CharacterPredicates.IS_DIGIT)).optional()));

    /**
     * A {@link Pattern} object that matches a decimal point and one or more digits after it.
     */
    public static final Pattern FRACTION = isChar('-').optional().next(isChar('.').next(INTEGER));

    /**
     * A {@link Pattern} object that matches a decimal number that could start with a decimal
     * point or a digit.
     */
    public static final Pattern DECIMAL = STRICT_DECIMAL.or(FRACTION);

    /**
     * A {@link Pattern} object that matches a standard english word, which starts with either
     * an underscore or an alpha character, followed by 0 or more alphanumeric characters.
     */
    public static final Pattern WORD = isChar(CharacterPredicates.IS_ALPHA_)
            .next(isChar(CharacterPredicates.IS_ALPHA_NUMERIC_).many());

    /**
     * A {@link Pattern} object that matches an octal integer that starts with a {@code 0} and
     * is followed by 0 or more {@code [0 - 7]} characters.
     */
    public static final Pattern OCT_INTEGER =
            isChar('0').next(many(CharacterPredicates.range('0', '7')));

    /**
     * A {@link Pattern} object that matches a decimal integer, which starts with a non-zero
     * digit and is followed by 0 or more digits.
     */
    public static final Pattern DEC_INTEGER =
            sequence(range('1', '9'), many(CharacterPredicates.IS_DIGIT));

    /**
     * A {@link Pattern} object that matches a hex integer, which starts with a {@code 0x} or
     * {@code 0X}, and is followed by one or more hex digits.
     */
    public static final Pattern HEX_INTEGER =
            string("0x").or(string("0X")).next(many1(CharacterPredicates.IS_HEX_DIGIT));

    /**
     * A {@link Pattern} object that matches a scientific notation, such as {@code 1e12},
     * {@code 1.2E-1}, etc.
     */
    public static final Pattern SCIENTIFIC_NOTATION = sequence(
            DECIMAL, among("eE"), among("+-").optional(), INTEGER);


    /**
     * A {@link Pattern} object that matches any regular expression pattern string in the form
     * of {@code /some pattern here/}. {@code '\'} is used as escape character.
     */
    public static final Pattern REGEXP_PATTERN = getRegularExpressionPattern();


    /**
     * A {@link Pattern} object that matches regular expression modifiers, which is a list of
     * alpha characters.
     */
    public static final Pattern REGEXP_MODIFIERS = getModifiersPattern();

    /**
     * Returns a {@link Pattern} object that matches if the input has at least {@code n}
     * characters left. Match length is {@code n} if succeed.
     */
    public static Pattern hasAtLeast(final int n) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (begin + n > end) return Pattern.MISMATCH;
                else return n;
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches if the input has exactly {@code n}
     * characters left. Match length is {@code n} if succeed.
     */
    public static Pattern hasExact(final int n) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (begin + n != end) return Pattern.MISMATCH;
                else return n;
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches if the current character in the input is equal to
     * character {@code c}, in which case {@code 1} is returned as match length. Mismatches otherwise.
     */
    public static Pattern isChar(char c) {
        return isChar(CharacterPredicates.isChar(c));
    }

    /**
     * Returns a {@link Pattern} object that matches if the current character in the input is between
     * character {@code c1} and {@code c2}, in which case {@code 1} is returned as match length.
     */
    public static Pattern range(char c1, char c2) {
        return isChar(CharacterPredicates.range(c1, c2));
    }

    /**
     * Returns a {@link Pattern} object that matches if the current character in the input is equal to
     * any character in {@code chars}, in which case {@code 1} is returned as match length.
     */
    public static Pattern among(String chars) {
        return isChar(CharacterPredicates.among(chars));
    }

    /**
     * Returns a {@link Pattern} object that matches if the current character in the input satisfies
     * {@code predicate}, in which case {@code 1} is returned as match length.
     */
    public static Pattern isChar(final Predicate<Character> predicate) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (begin >= end) return Pattern.MISMATCH;
                else if (predicate.matches(src.charAt(begin))) return 1;
                else return Pattern.MISMATCH;
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches a line comment started by {@code begin}
     * and ended by {@code EOF} or {@code LF} (the line feed character).
     */
    public static Pattern lineComment(String begin) {
        return string(begin).next(many(CharacterPredicates.notChar('\n')));
    }

    /**
     * Returns a {@link Pattern} object that matches {@code string} literally.
     */
    public static Pattern string(final String string) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchString(string, src, begin, end);
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches {@code string} case insensitively.
     */
    public static Pattern stringCaseInsensitive(final String string) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchStringCaseInsensitive(string, src, begin, end);
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches if the input has at least 1 character and doesn't
     * match {@code string}. {@code 1} is returned as match length if succeeds.
     */
    public static Pattern notString(final String string) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (begin >= end) return MISMATCH;
                if (matchString(string, src, begin, end) == Pattern.MISMATCH)
                    return 1;
                else return MISMATCH;
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches if the input has at least 1 character and doesn't
     * match {@code string} case insensitively. {@code 1} is returned as match length if succeeds.
     */
    public static Pattern notStringCaseInsensitive(final String string) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (begin >= end) return MISMATCH;
                if (matchStringCaseInsensitive(string, src, begin, end) == Pattern.MISMATCH)
                    return 1;
                else return MISMATCH;
            }
        };
    }

    private static boolean compareIgnoreCase(char a, char b) {
        return Character.toLowerCase(a) == Character.toLowerCase(b);
    }

    private static int matchString(String str, CharSequence src, int begin, int end) {
        final int slen = str.length();
        if (end - begin < slen) return Pattern.MISMATCH;
        for (int i = 0; i < slen; i++) {
            final char exp = str.charAt(i);
            final char enc = src.charAt(begin + i);
            if (exp != enc) {
                return Pattern.MISMATCH;
            }
        }
        return slen;
    }

    private static int matchStringCaseInsensitive(String str, CharSequence src, int begin, int end) {
        final int slen = str.length();
        if (end - begin < slen) return Pattern.MISMATCH;
        for (int i = 0; i < slen; i++) {
            final char exp = str.charAt(i);
            final char enc = src.charAt(begin + i);
            if (!compareIgnoreCase(exp, enc)) {
                return Pattern.MISMATCH;
            }
        }
        return slen;
    }

    static Pattern not(final Pattern pp) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (pp.match(src, begin, end) != Pattern.MISMATCH) return Pattern.MISMATCH;
                else return 0;
            }
        };
    }

    static Pattern peek(final Pattern pp) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (pp.match(src, begin, end) == Pattern.MISMATCH) return Pattern.MISMATCH;
                else return 0;
            }
        };
    }

    /**
     * Returns a {@link Pattern} that matches if all of {@code patterns} matches,
     * in which case, the maximum match length is returned. Mismatch if any one mismatches.
     */
    public static Pattern and(final Pattern... patterns) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                int ret = 0;
                for (Pattern pattern : patterns) {
                    int l = pattern.match(src, begin, end);
                    if (l == MISMATCH) return MISMATCH;
                    if (l > ret) ret = l;
                }
                return ret;
            }
        };
    }

    /**
     * Returns a {@link Pattern} that matches if any of {@code patterns} matches, in which case, the
     * first match length is returned. Mismatch if any one mismatches.
     */
    public static Pattern or(final Pattern... patterns) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                for (Pattern pattern : patterns) {
                    int l = pattern.match(src, begin, end);
                    if (l != MISMATCH) return l;
                }
                return MISMATCH;
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches the input against {@code patterns} sequentially.
     * Te total match length is returned if all succeed.
     */
    public static Pattern sequence(final Pattern... patterns) {
        return new Pattern() {
            @Override
            public int match(final CharSequence src, final int begin, final int end) {
                int current = begin;
                for (Pattern pattern : patterns) {
                    int l = pattern.match(src, current, end);
                    if (l == Pattern.MISMATCH) return l;
                    current += l;
                }
                return current - begin;
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches if the input has at least {@code n} characters
     * and the first {@code n} characters all satisfy {@code predicate}.
     */
    public static Pattern repeat(final int n, final Predicate<Character> predicate) {
        Checks.checkNonNegative(n, "n < 0");
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchRepeat(n, predicate, src, end, begin, 0);
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches if the input has {@code n} occurrences of
     * {@code pattern}.
     */
    static Pattern repeat(final int n, final Pattern pattern) {
        Checks.checkNonNegative(n, "n < 0");
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchRepeat(n, pattern, src, end, begin, 0);
            }
        };
    }

    /**
     * Returns a {@link Pattern} object that matches if the input starts with {@code min} or more
     * characters and all satisfy {@code predicate}.
     */
    public static Pattern many(final int min, final Predicate<Character> predicate) {
        Checks.checkMin(min);
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                int minlen = matchRepeat(min, predicate, src, end, begin, 0);
                if (minlen == MISMATCH) return MISMATCH;
                return matchMany(predicate, src, end, begin + minlen, minlen);
            }
        };
    }

    /**
     * Returns a {@link Pattern} that matches 0 or more characters satisfying {@code predicate}.
     */
    public static Pattern many(final Predicate<Character> predicate) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchMany(predicate, src, end, begin, 0);
            }
        };
    }

    static Pattern many(final int min, final Pattern pattern) {
        Checks.checkMin(min);
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                int minlen = matchRepeat(min, pattern, src, end, begin, 0);
                if (MISMATCH == minlen) return MISMATCH;
                return matchMany(pattern, src, end, begin + minlen, minlen);
            }
        };
    }

    static Pattern many(final Pattern pattern) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchMany(pattern, src, end, begin, 0);
            }
        };
    }

    /**
     * Returns a {@link Pattern} that matches at least {@code min} and up to {@code max} number of
     * characters satisfying {@code predicate},
     */
    public static Pattern some(final int min, final int max, final Predicate<Character> predicate) {
        Checks.checkMinMax(min, max);
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                int minlen = matchRepeat(min, predicate, src, end, begin, 0);
                if (minlen == MISMATCH) return MISMATCH;
                return matchSome(max - min, predicate, src, end, begin + minlen, minlen);
            }
        };
    }

    /**
     * Returns a {@link Pattern} that matches up to {@code max} number of characters
     * satisfying {@code predicate}.
     */
    public static Pattern some(final int max, final Predicate<Character> predicate) {
        Checks.checkMax(max);
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchSome(max, predicate, src, end, begin, 0);
            }
        };
    }

    static Pattern some(final int min, final int max, final Pattern pp) {
        Checks.checkMinMax(min, max);
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                int minlen = matchRepeat(min, pp, src, end, begin, 0);
                if (MISMATCH == minlen) return MISMATCH;
                return matchSome(max - min, pp, src, end, begin + minlen, minlen);
            }
        };
    }

    static Pattern some(final int max, final Pattern pp) {
        Checks.checkMax(max);
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                return matchSome(max, pp, src, end, begin, 0);
            }
        };
    }

    /**
     * Returns a {@link Pattern} that tries both {@code p1} and {@code p2}, and picks the one with the
     * longer match length. If both have the same length, {@code p1} is favored.
     */
    public static Pattern longer(Pattern p1, Pattern p2) {
        return longest(p1, p2);
    }

    /**
     * Returns a {@link Pattern} that tries all of {@code patterns}, and picks the one with the
     * longest match length. If two patterns have the same length, the first one is favored.
     */
    public static Pattern longest(final Pattern... patterns) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                int r = MISMATCH;
                for (Pattern pattern : patterns) {
                    int l = pattern.match(src, begin, end);
                    if (l > r) r = l;
                }
                return r;
            }
        };
    }

    /**
     * Returns a {@link Pattern} that tries both {@code p1} and {@code p2}, and picks the one with the
     * shorter match length. If both have the same length, {@code p1} is favored.
     */
    public static Pattern shorter(Pattern p1, Pattern p2) {
        return shortest(p1, p2);
    }

    /**
     * Returns a {@link Pattern} that tries all of {@code patterns}, and picks the one with the
     * shortest match length. If two patterns have the same length, the first one is favored.
     */
    public static Pattern shortest(final Pattern... patterns) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                int r = MISMATCH;
                for (int i = 0; i < patterns.length; i++) {
                    final int l = patterns[i].match(src, begin, end);
                    if (l != MISMATCH) {
                        if (r == MISMATCH || l < r)
                            r = l;
                    }
                }
                return r;
            }
        };
    }

    static Pattern ifelse(final Pattern cond, final Pattern consequence, final Pattern alternative) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                final int conditionResult = cond.match(src, begin, end);
                if (conditionResult == MISMATCH) {
                    return alternative.match(src, begin, end);
                } else {
                    final int consequenceResult = consequence.match(src, begin + conditionResult, end);
                    if (consequenceResult == MISMATCH) return MISMATCH;
                    else return conditionResult + consequenceResult;
                }
            }
        };
    }

    /**
     * Returns a {@link Pattern} that matches 1 or more characters satisfying {@code predicate}.
     */
    public static Pattern many1(Predicate<Character> predicate) {
        return many(1, predicate);
    }

    /**
     * Adapts a regular expression pattern to a {@link Pattern}.
     */
    public static Pattern regex(final java.util.regex.Pattern p) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (begin > end) return Pattern.MISMATCH;
                Matcher matcher = p.matcher(src.subSequence(begin, end));
                if (matcher.lookingAt()) return matcher.end();
                return Pattern.MISMATCH;
            }
        };
    }

    /**
     * Adapts a regular expression pattern string to a {@link Pattern}.
     */
    public static Pattern regex(String s) {
        return regex(java.util.regex.Pattern.compile(s));
    }

    static Pattern optional(final Pattern pp) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                final int l = pp.match(src, begin, end);
                return (l == Pattern.MISMATCH) ? 0 : l;
            }
        };
    }

    private static int matchRepeat(
            int n, Predicate<Character> predicate, CharSequence src, int len, int from, int acc) {
        int tail = from + n;
        if (tail > len) return Pattern.MISMATCH;
        for (int i = from; i < tail; i++) {
            if (!predicate.matches(src.charAt(i))) return Pattern.MISMATCH;
        }
        return n + acc;
    }

    private static int matchRepeat(
            int n, Pattern pattern, CharSequence src, int len, int from, int acc) {
        int end = from;
        for (int i = 0; i < n; i++) {
            int l = pattern.match(src, end, len);
            if (l == Pattern.MISMATCH) return Pattern.MISMATCH;
            end += l;
        }
        return end - from + acc;
    }

    private static int matchSome(
            int max, Predicate<Character> predicate, CharSequence src, int len, int from, int acc) {
        int k = Math.min(max + from, len);
        for (int i = from; i < k; i++) {
            if (!predicate.matches(src.charAt(i))) return i - from + acc;
        }
        return k - from + acc;
    }

    private static int matchSome(
            int max, Pattern pattern, CharSequence src, int len, int from, int acc) {
        int begin = from;
        for (int i = 0; i < max; i++) {
            int l = pattern.match(src, begin, len);
            if (Pattern.MISMATCH == l) return begin - from + acc;
            begin += l;
        }
        return begin - from + acc;
    }

    private static int matchMany(
            Predicate<Character> predicate, CharSequence src, int len, int from, int acc) {
        for (int i = from; i < len; i++) {
            if (!predicate.matches(src.charAt(i))) return i - from + acc;
        }
        return len - from + acc;
    }

    private static int matchMany(Pattern pattern, CharSequence src, int len, int from, int acc) {
        for (int i = from; ; ) {
            int l = pattern.match(src, i, len);
            if (Pattern.MISMATCH == l) return i - from + acc;
            //we simply stop the loop when infinity is found. this may make the parser more user-friendly.
            if (l == 0) return i - from + acc;
            i += l;
        }
    }

    private static final Pattern getRegularExpressionPattern() {
        Pattern quote = isChar('/');
        Pattern escape = isChar('\\').next(hasAtLeast(1));
        Pattern content = or(escape, isChar(CharacterPredicates.notAmong("/\r\n\\")));
        return quote.next(content.many()).next(quote);
    }

    private static final Pattern getModifiersPattern() {
        return isChar(CharacterPredicates.IS_ALPHA).many();
    }
}
